Para esta analise, vamos usar um conjunto de dados coletados do Portal da Saude do Governo Federal.
http://combateaedes.saude.gov.br/pt/situacao-epidemiologica
Todo o projeto sera descrito de acordo com suas etapas. Os acentos foram ignorados para evitar problemas de interpretacao em diferentes sistemas operacionais.
# Carregando os pacotes
# devtools::install_github("wch/webshot")
library(dplyr)
## Warning: package 'dplyr' was built under R version 3.4.2
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 3.4.2
# Listando os arquivos e gerando uma lista com os respctivos nomes
temp_files <- list.files(pattern = ".csv")
temp_files
## [1] "Epidemiological_Bulletin-2016-04-02.csv"
## [2] "Epidemiological_Bulletin-2016-04-23.csv"
## [3] "Epidemiological_Bulletin-2016-04-30.csv"
## [4] "Epidemiological_Bulletin-2016-05-07.csv"
## [5] "Epidemiological_Bulletin-2016-05-14.csv"
## [6] "Epidemiological_Bulletin-2016-05-21.csv"
## [7] "Epidemiological_Bulletin-2016-05-28.csv"
## [8] "Epidemiological_Bulletin-2016-06-11.csv"
# Carregando todos os arquivos em um unico objeto
myfiles <- lapply(temp_files, read.csv, stringsAsFactors = FALSE)
# Resumo dos arquivos
str(myfiles, 1)
## List of 8
## $ :'data.frame': 33 obs. of 9 variables:
## $ :'data.frame': 33 obs. of 9 variables:
## $ :'data.frame': 33 obs. of 9 variables:
## $ :'data.frame': 33 obs. of 9 variables:
## $ :'data.frame': 33 obs. of 9 variables:
## $ :'data.frame': 33 obs. of 9 variables:
## $ :'data.frame': 33 obs. of 9 variables:
## $ :'data.frame': 33 obs. of 9 variables:
lapply(myfiles, names)[1]
## [[1]]
## [1] "report_date" "location" "location_type"
## [4] "data_field" "data_field_code" "time_period"
## [7] "time_period_type" "value" "unit"
lapply(myfiles, head,2)[1:2]
## [[1]]
## report_date location location_type data_field data_field_code
## 1 2016-04-02 Norte region zika_reported BR0011
## 2 2016-04-02 Brazil-Rondonia state zika_reported BR0011
## time_period time_period_type value unit
## 1 NA NA 6295 cases
## 2 NA NA 618 cases
##
## [[2]]
## report_date location location_type data_field data_field_code
## 1 2016-04-23 Norte region zika_reported BR0011
## 2 2016-04-23 Brazil-Acre state zika_reported BR0011
## time_period time_period_type value unit
## 1 NA NA 8545 cases
## 2 NA NA 716 cases
# Organizando o shape dos dados
brazil <- do.call(rbind, myfiles)
brazil <- brazil %>%
mutate(report_date = as.Date(report_date))
## Warning: package 'bindrcpp' was built under R version 3.4.1
# Visualizando o dataset
glimpse(brazil)
## Observations: 264
## Variables: 9
## $ report_date <date> 2016-04-02, 2016-04-02, 2016-04-02, 2016-04-...
## $ location <chr> "Norte", "Brazil-Rondonia", "Brazil-Acre", "B...
## $ location_type <chr> "region", "state", "state", "state", "state",...
## $ data_field <chr> "zika_reported", "zika_reported", "zika_repor...
## $ data_field_code <chr> "BR0011", "BR0011", "BR0011", "BR0011", "BR00...
## $ time_period <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N...
## $ time_period_type <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N...
## $ value <int> 6295, 618, 375, 1520, 44, 771, 74, 2893, 3028...
## $ unit <chr> "cases", "cases", "cases", "cases", "cases", ...
# Transformando o dataframe um uma tabela dplyr e removendo as colunas 6 a 7
brazil <- brazil %>% select(-(6:7))
# Visualizando as primeiras 20 linhas
brazil %>% slice (1:20)
## # A tibble: 20 x 7
## report_date location location_type data_field
## <date> <chr> <chr> <chr>
## 1 2016-04-02 Norte region zika_reported
## 2 2016-04-02 Brazil-Rondonia state zika_reported
## 3 2016-04-02 Brazil-Acre state zika_reported
## 4 2016-04-02 Brazil-Amazonas state zika_reported
## 5 2016-04-02 Brazil-Roraima state zika_reported
## 6 2016-04-02 Brazil-Para state zika_reported
## 7 2016-04-02 Brazil-Amapa state zika_reported
## 8 2016-04-02 Brazil-Tocantins state zika_reported
## 9 2016-04-02 Nordeste region zika_reported
## 10 2016-04-02 Brazil-Maranhao state zika_reported
## 11 2016-04-02 Brazil-Piaui state zika_reported
## 12 2016-04-02 Brazil-Ceara state zika_reported
## 13 2016-04-02 Brazil-Rio_Grande_do_Norte state zika_reported
## 14 2016-04-02 Brazil-Paraiba state zika_reported
## 15 2016-04-02 Brazil-Pernambuco state zika_reported
## 16 2016-04-02 Brazil-Alagoas state zika_reported
## 17 2016-04-02 Brazil-Sergipe state zika_reported
## 18 2016-04-02 Brazil-Bahia state zika_reported
## 19 2016-04-02 Sudeste region zika_reported
## 20 2016-04-02 Brazil-Minas_Gerais state zika_reported
## # ... with 3 more variables: data_field_code <chr>, value <int>,
## # unit <chr>
# Para cada reporting_date nos temos 5 regioes
brazil %>% filter(location_type == "region")
## report_date location location_type data_field data_field_code
## 1 2016-04-02 Norte region zika_reported BR0011
## 2 2016-04-02 Nordeste region zika_reported BR0011
## 3 2016-04-02 Sudeste region zika_reported BR0011
## 4 2016-04-02 Sul region zika_reported BR0011
## 5 2016-04-02 Centro-Oeste region zika_reported BR0011
## 6 2016-04-23 Norte region zika_reported BR0011
## 7 2016-04-23 Nordeste region zika_reported BR0011
## 8 2016-04-23 Sudeste region zika_reported BR0011
## 9 2016-04-23 Sul region zika_reported BR0011
## 10 2016-04-23 Centro-Oeste region zika_reported BR0011
## 11 2016-04-30 Norte region zika_reported BR0011
## 12 2016-04-30 Nordeste region zika_reported BR0011
## 13 2016-04-30 Sudeste region zika_reported BR0011
## 14 2016-04-30 Sul region zika_reported BR0011
## 15 2016-04-30 Centro-Oeste region zika_reported BR0011
## 16 2016-05-07 Norte region zika_reported BR0011
## 17 2016-05-07 Nordeste region zika_reported BR0011
## 18 2016-05-07 Sudeste region zika_reported BR0011
## 19 2016-05-07 Sul region zika_reported BR0011
## 20 2016-05-07 Centro-Oeste region zika_reported BR0011
## 21 2016-05-14 Norte region zika_reported BR0011
## 22 2016-05-14 Nordeste region zika_reported BR0011
## 23 2016-05-14 Sudeste region zika_reported BR0011
## 24 2016-05-14 Sul region zika_reported BR0011
## 25 2016-05-14 Centro-Oeste region zika_reported BR0011
## 26 2016-05-21 Norte region zika_reported BR0011
## 27 2016-05-21 Nordeste region zika_reported BR0011
## 28 2016-05-21 Sudeste region zika_reported BR0011
## 29 2016-05-21 Sul region zika_reported BR0011
## 30 2016-05-21 Centro-Oeste region zika_reported BR0011
## 31 2016-05-28 Norte region zika_reported BR0011
## 32 2016-05-28 Nordeste region zika_reported BR0011
## 33 2016-05-28 Sudeste region zika_reported BR0011
## 34 2016-05-28 Sul region zika_reported BR0011
## 35 2016-05-28 Centro-Oeste region zika_reported BR0011
## 36 2016-06-11 Norte region zika_reported BR0011
## 37 2016-06-11 Nordeste region zika_reported BR0011
## 38 2016-06-11 Sudeste region zika_reported BR0011
## 39 2016-06-11 Sul region zika_reported BR0011
## 40 2016-06-11 Centro-Oeste region zika_reported BR0011
## value unit
## 1 6295 cases
## 2 30286 cases
## 3 35505 cases
## 4 1797 cases
## 5 17504 cases
## 6 8545 cases
## 7 43000 cases
## 8 46318 cases
## 9 2197 cases
## 10 20101 cases
## 11 8379 cases
## 12 47709 cases
## 13 48027 cases
## 14 2343 cases
## 15 21364 cases
## 16 8053 cases
## 17 51065 cases
## 18 54803 cases
## 19 2431 cases
## 20 21756 cases
## 21 8053 cases
## 22 51065 cases
## 23 54803 cases
## 24 2431 cases
## 25 21756 cases
## 26 8432 cases
## 27 54165 cases
## 28 61309 cases
## 29 2491 cases
## 30 22508 cases
## 31 9022 cases
## 32 59745 cases
## 33 65328 cases
## 34 2463 cases
## 35 24683 cases
## 36 10645 cases
## 37 61829 cases
## 38 65820 cases
## 39 2392 cases
## 40 25246 cases
brazil %>% filter(location_type == "region") %>%
ggplot(aes(x = report_date, y = value, group = location, color = location)) +
geom_line() +
geom_point() +
ggtitle("Casos de Zika por Regiao do Brasil")
# Separando as Regioes e Visualizando os Dados
region <- brazil %>%
filter(location_type == "region")
region %>%
ggplot(aes(x =location, y = value)) + geom_bar(stat = "identity") +
ylab("Numero de Casos Reportados") + xlab("Region") +
ggtitle("Casos de Zika Reportados no Brasil")
region %>%
slice(1:length(unique(region$location))) %>%
arrange(desc(value)) %>%
mutate(location = factor(location, levels = location,ordered = TRUE)) %>%
ggplot(aes(x = location, y = value)) + geom_bar(stat = "identity") +
ylab("Numero de Casos Reportados") + xlab("Region") +
ggtitle("Casos de Zika Reportados no Brasil")
# Obtendo localidades unicas
region %>%
slice(1:length(unique(region$location)))
## # A tibble: 5 x 7
## report_date location location_type data_field data_field_code
## <date> <chr> <chr> <chr> <chr>
## 1 2016-04-02 Norte region zika_reported BR0011
## 2 2016-04-02 Nordeste region zika_reported BR0011
## 3 2016-04-02 Sudeste region zika_reported BR0011
## 4 2016-04-02 Sul region zika_reported BR0011
## 5 2016-04-02 Centro-Oeste region zika_reported BR0011
## # ... with 2 more variables: value <int>, unit <chr>
# Organziando as localidades unicas por numero de casos reportados
region %>%
slice(1:length(unique(region$location))) %>%
arrange(desc(value))
## # A tibble: 5 x 7
## report_date location location_type data_field data_field_code
## <date> <chr> <chr> <chr> <chr>
## 1 2016-04-02 Sudeste region zika_reported BR0011
## 2 2016-04-02 Nordeste region zika_reported BR0011
## 3 2016-04-02 Centro-Oeste region zika_reported BR0011
## 4 2016-04-02 Norte region zika_reported BR0011
## 5 2016-04-02 Sul region zika_reported BR0011
## # ... with 2 more variables: value <int>, unit <chr>
# Criando variaveis do tipo fator
region %>%
slice(1:length(unique(region$location))) %>%
arrange(desc(value)) %>%
mutate(location = factor(location,levels=location,ordered=TRUE)) %>%
glimpse()
## Observations: 5
## Variables: 7
## $ report_date <date> 2016-04-02, 2016-04-02, 2016-04-02, 2016-04-0...
## $ location <ord> Sudeste, Nordeste, Centro-Oeste, Norte, Sul
## $ location_type <chr> "region", "region", "region", "region", "region"
## $ data_field <chr> "zika_reported", "zika_reported", "zika_report...
## $ data_field_code <chr> "BR0011", "BR0011", "BR0011", "BR0011", "BR0011"
## $ value <int> 35505, 30286, 17504, 6295, 1797
## $ unit <chr> "cases", "cases", "cases", "cases", "cases"
# Agrupando o Sumarizando
brazil_totals <- brazil %>% filter(location=="Brazil")
region_totals <- brazil %>% filter(location_type=="region") %>%
group_by(report_date,location) %>%
summarize(tot = sum(value))
# Padronizar os dados e remover as sumarizacoes
regvec <- vector()
length(regvec) <- nrow(brazil)
for (ii in 1:nrow(brazil)) {
if (brazil[ii,]$location_type != "region") {
regvec[ii] <- newlab
} else {
newlab <- brazil[ii,]$location
regvec[ii] <- newlab
}
}
# Agregando o vetor de regioes ao dataframe brasil
statedf <- cbind(brazil,regvec)
# Eliminar o sumario de linhas por regiao e pais
statedf <- statedf %>% filter(location != "Brazil")
statedf <- statedf %>% filter(location_type != "region")
# Gerar o total por regioes a partir dos dados transformados
statedf %>% group_by(report_date,regvec) %>%
summarize(tot=sum(value)) -> totals
# Gerando os mapas de cada estado do Brasil
#install.packages('ggmap')
library(ggmap)
## Warning: package 'ggmap' was built under R version 3.4.2
longlat <- geocode(unique(statedf$location)) %>%
mutate(loc = unique(statedf$location))
## Information from URL : http://maps.googleapis.com/maps/api/geocode/json?address=Brazil-Rondonia&sensor=false
## Information from URL : http://maps.googleapis.com/maps/api/geocode/json?address=Brazil-Acre&sensor=false
## Information from URL : http://maps.googleapis.com/maps/api/geocode/json?address=Brazil-Amazonas&sensor=false
## Information from URL : http://maps.googleapis.com/maps/api/geocode/json?address=Brazil-Roraima&sensor=false
## Information from URL : http://maps.googleapis.com/maps/api/geocode/json?address=Brazil-Para&sensor=false
## Information from URL : http://maps.googleapis.com/maps/api/geocode/json?address=Brazil-Amapa&sensor=false
## Information from URL : http://maps.googleapis.com/maps/api/geocode/json?address=Brazil-Tocantins&sensor=false
## Information from URL : http://maps.googleapis.com/maps/api/geocode/json?address=Brazil-Maranhao&sensor=false
## Information from URL : http://maps.googleapis.com/maps/api/geocode/json?address=Brazil-Piaui&sensor=false
## Information from URL : http://maps.googleapis.com/maps/api/geocode/json?address=Brazil-Ceara&sensor=false
## .Information from URL : http://maps.googleapis.com/maps/api/geocode/json?address=Brazil-Rio_Grande_do_Norte&sensor=false
## .Information from URL : http://maps.googleapis.com/maps/api/geocode/json?address=Brazil-Paraiba&sensor=false
## Warning: geocode failed with status OVER_QUERY_LIMIT, location = "Brazil-
## Paraiba"
## .Information from URL : http://maps.googleapis.com/maps/api/geocode/json?address=Brazil-Pernambuco&sensor=false
## Information from URL : http://maps.googleapis.com/maps/api/geocode/json?address=Brazil-Alagoas&sensor=false
## Information from URL : http://maps.googleapis.com/maps/api/geocode/json?address=Brazil-Sergipe&sensor=false
## Information from URL : http://maps.googleapis.com/maps/api/geocode/json?address=Brazil-Bahia&sensor=false
## Information from URL : http://maps.googleapis.com/maps/api/geocode/json?address=Brazil-Minas_Gerais&sensor=false
## Information from URL : http://maps.googleapis.com/maps/api/geocode/json?address=Brazil-Espirito_Santo&sensor=false
## Information from URL : http://maps.googleapis.com/maps/api/geocode/json?address=Brazil-Rio_de_Janeiro&sensor=false
## Information from URL : http://maps.googleapis.com/maps/api/geocode/json?address=Brazil-Sao_Paulo&sensor=false
## .Information from URL : http://maps.googleapis.com/maps/api/geocode/json?address=Brazil-Parana&sensor=false
## .Information from URL : http://maps.googleapis.com/maps/api/geocode/json?address=Brazil-Santa_Catarina&sensor=false
## Information from URL : http://maps.googleapis.com/maps/api/geocode/json?address=Brazil-Rio_Grande_do_Sul&sensor=false
## Information from URL : http://maps.googleapis.com/maps/api/geocode/json?address=Brazil-Mato_Grosso_do_Sul&sensor=false
## Information from URL : http://maps.googleapis.com/maps/api/geocode/json?address=Brazil-Mato_Grosso&sensor=false
## Information from URL : http://maps.googleapis.com/maps/api/geocode/json?address=Brazil-Goias&sensor=false
## Information from URL : http://maps.googleapis.com/maps/api/geocode/json?address=Brazil-Distrito_Federal&sensor=false
# Salvando os geocodes do dataframe statedf e salvando em um novo dataframe chamado formapping
statedf %>% filter(as.character(report_date) == "2016-06-11") %>%
group_by(location) %>% summarize(cases = sum(value)) %>%
inner_join(longlat, by = c("location" = "loc")) %>%
mutate(LatLon = paste(lat, lon, sep = ":")) -> formapping
# Visualizando os dados
head(formapping)
## # A tibble: 6 x 5
## location cases lon lat LatLon
## <chr> <int> <dbl> <dbl> <chr>
## 1 Brazil-Acre 846 -70.81200 -9.0237964 -9.0237964:-70.8119953
## 2 Brazil-Alagoas 3847 -36.78195 -9.5713058 -9.5713058:-36.7819505
## 3 Brazil-Amapa 189 -52.00296 0.9019925 0.9019925:-52.0029565
## 4 Brazil-Amazonas 3713 -65.85606 -3.4168427 -3.4168427:-65.8560646
## 5 Brazil-Bahia 46427 -41.70073 -12.5797380 -12.579738:-41.7007272
## 6 Brazil-Ceara 2358 -39.32062 -5.4983977 -5.4983977:-39.3206241
# Formatando a saida e gerando um movo dataframe chamado long_formapping
num_of_times_to_repeat <- formapping$cases
long_formapping <- formapping[rep(seq_len(nrow(formapping)),
num_of_times_to_repeat),]
# Visualizando os dados
head(long_formapping)
## # A tibble: 6 x 5
## location cases lon lat LatLon
## <chr> <int> <dbl> <dbl> <chr>
## 1 Brazil-Acre 846 -70.812 -9.023796 -9.0237964:-70.8119953
## 2 Brazil-Acre 846 -70.812 -9.023796 -9.0237964:-70.8119953
## 3 Brazil-Acre 846 -70.812 -9.023796 -9.0237964:-70.8119953
## 4 Brazil-Acre 846 -70.812 -9.023796 -9.0237964:-70.8119953
## 5 Brazil-Acre 846 -70.812 -9.023796 -9.0237964:-70.8119953
## 6 Brazil-Acre 846 -70.812 -9.023796 -9.0237964:-70.8119953
# Instalando o pacote leaflet
# install.packages("leaflet")
library(leaflet)
## Warning: package 'leaflet' was built under R version 3.4.2
# Gerando o mapa com o dataframe
# Aplique o zoom
leaflet(long_formapping) %>%
addTiles() %>%
addMarkers(clusterOptions = markerClusterOptions())
## Assuming 'lon' and 'lat' are longitude and latitude, respectively
## Warning in validateCoords(lng, lat, funcName): Data contains 2889 rows with
## either missing or invalid lat/lon values and will be ignored